OpenSidewalkMap Statistics

About: this statistics are compilated with Sidewalks,Crossings and Kerbs data.

All the code is kept here, so anyone can reproduce!


Scroll down and the charts will begin to appear, they we're made with the amazing Altair library, that enables interactivity


Sidewalks Statistics
Crossings Statistics
Kerbs Statistics

currently it's only optimized for desktop

The Jupyter Notebook is avaliable Here

In [1]:
from datetime import datetime 

now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print('Last Update: ',dt_string)
Last Update:  17/06/2023 13:43:32
In [2]:
import geopandas as gpd
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()
Out[2]:
DataTransformerRegistry.enable('default')
In [3]:
def get_count_df(input_df,fieldname,str_to_append=' type'):
    outfieldname = fieldname+str_to_append
    return input_df[fieldname].value_counts().reset_index().rename(columns={'index':outfieldname,fieldname:'count'}).sort_values(by='count',ascending=False),outfieldname

def create_barchart(input_df,fieldname,title,str_to_append=' type',title_fontsize=24,tooltip='count',x_sort='-y',tooltip_list=['percent']):
    # bind = alt.selection_interval(bind='scales')
    # .add_selection(bind)

    data_to_plot,fieldname_v2 = get_count_df(input_df,fieldname,str_to_append)

    feat_count = float(data_to_plot['count'].sum())

    def compute_formatted_percent(featureval):
        return str(round((featureval/feat_count)*100,2))+"%"

    data_to_plot['percent'] = data_to_plot['count'].apply(compute_formatted_percent)

    return alt.Chart(data_to_plot,title=title).mark_bar().encode(
        x=alt.X(fieldname_v2,sort=x_sort),
        y='count',
        tooltip=tooltip_list,
    ).properties(
    width=650,
    height=300).configure_title(fontSize=title_fontsize).interactive()

def create_barchartV2(input_gdf,fieldname,title,str_to_append=' type',title_fontsize=24,len_field='length(km)'):

    # bind = alt.selection_interval(bind='scales')
    # .add_selection(bind)

    fieldname_v2 = fieldname+str_to_append

    data_to_plot = input_gdf[[len_field,fieldname]].groupby([fieldname]).agg({fieldname:'count',len_field:'sum'}).rename(columns={fieldname:'feature count'}).reset_index().rename(columns={fieldname:fieldname_v2})

    return alt.Chart(data_to_plot,title=title).mark_bar().encode(
        x=alt.X(fieldname_v2,sort='-y'),
        y=len_field,
        tooltip=len_field,
        color='feature count'
    ).properties(
    width=650,
    height=300).configure_title(fontSize=title_fontsize).interactive()

def print_relevant_columnames(input_df,not_include=('score','geometry','type','id')):
    print(*[f'{column}, ' for column in input_df.columns if not any(word in column for word in not_include)])

def return_weblink(string_id,type='way'):
    return f"<a href=https://www.openstreetmap.org/{type}/{string_id}>{string_id}</a>"

def get_year_surveydate(featuredate):
    return featuredate.split('-')[0]
    

SIDEWALKS STATISTICS¶

In [4]:
sidewalks_gdf = gpd.read_file('../data/sidewalks.geojson')
utm_crs = sidewalks_gdf.estimate_utm_crs()
# sidewalks_data = pd.DataFrame(sidewalks_gdf)
In [5]:
# compute lengths only once:
sidewalks_gdf['length(km)'] = sidewalks_gdf.to_crs(utm_crs).length/1000

sidewalks_gdf['weblink'] = sidewalks_gdf['id'].astype('string').apply(return_weblink)

sidewalks_gdf['Year of Survey'] = sidewalks_gdf['survey:date'].apply(get_year_surveydate)
In [6]:
# sidewalk Length Statistics
sidewalks_gdf['length(km)'].describe()
Out[6]:
count    7762.000000
mean        0.036980
std         0.083621
min         0.000553
25%         0.014339
50%         0.022071
75%         0.030271
max         2.855776
Name: length(km), dtype: float64

printing relevant columns on the data:

In [7]:
print_relevant_columnames(sidewalks_gdf)
highway,  crossing,  traffic_signals,  bicycle,  mapillary,  survey:date,  wheelchair,  kerb,  barrier,  access,  lit,  foot,  addr:city,  addr:housenumber,  addr:street,  addr:suburb,  name,  tactile_paving,  surface,  smoothness,  footway,  paving_stones,  level,  building,  covered,  lcn,  motor_vehicle,  segregated,  horse,  oneway,  maxspeed,  layer,  source,  leisure,  tunnel,  incline,  cutting,  embankment,  dog,  cycleway,  cycleway:right,  ramp,  noname,  amenity,  religion,  opening_hours,  alt_name,  handrail,  ramp:wheelchair,  step_count,  indoor,  surface:note,  description,  lanes,  check_date:surface,  incline:across,  last_update,  update_date,  length(km),  weblink,  Year of Survey, 
In [8]:
create_barchartV2(sidewalks_gdf,'surface','Sidewalks Surface Type',title_fontsize=24)
Out[8]:
In [9]:
create_barchartV2(sidewalks_gdf,'smoothness','Sidewalks Smoothness Level',title_fontsize=24)
Out[9]:
In [10]:
create_barchartV2(sidewalks_gdf,'tactile_paving','Sidewalks Tactile Paving Presence',title_fontsize=24)
Out[10]:
In [11]:
create_barchartV2(sidewalks_gdf,'width','Sidewalks Width Values',title_fontsize=24)
Out[11]:
In [12]:
create_barchartV2(sidewalks_gdf,'incline','Sidewalks Incline Values',title_fontsize=24)
Out[12]:
In [13]:
def double_scatter_bar(input_df,title,xs='surface',ys='smoothness',scolor=None,xh='count()',yh1='surface',yh2='smoothness',hcolor=None,fontsize=24,tooltip_fields=['element_type','id']):

    interval = alt.selection_interval()

    default_color = alt.value('lightseagreen')

    if not hcolor:
        hcolor = default_color

    if not scolor:
        scolor = default_color


    scatter = alt.Chart(input_df,title=title).mark_point().encode(
        x=xs,
        y=ys,
        color=scolor,
        tooltip=alt.Tooltip(tooltip_fields),
    ).properties(
    width=600,
    height=350,).add_selection(interval)

    hist_base = alt.Chart(input_df).mark_bar().encode(
        x=xh,
        color=hcolor,
        tooltip=alt.Tooltip(tooltip_fields),
        

    ).properties(
        width=300,
        height=220,
    ).transform_filter(
        interval,
    )

    # if hcolor:
    #      hist_base.encode(color=hcolor)

    hist = hist_base.encode(y=yh1) | hist_base.encode(y=yh2)

    return (scatter & hist).configure_title(fontSize=fontsize,align='center')

# 'Surface x Smoothness'
In [14]:
sidewalks_gdf['element_type'].unique()
Out[14]:
array(['way'], dtype=object)
In [15]:
double_scatter_bar(sidewalks_gdf,'Surface x Smoothness (sidewalks)',hcolor='length(km)')
Out[15]:
In [16]:
sidewalks_gdf['surface'].unique()
Out[16]:
array(['?', 'cobblestone', 'asphalt', 'wood', 'concrete', 'paved',
       'concrete:plates', 'sett', 'rock', 'paving_stones', 'ground',
       'grass', 'dirt', 'compacted', 'earth', 'pebblestone',
       'fine_gravel', 'unpaved', 'concrete:lanes', 'unhewn_cobblestone',
       'ceramic:tiles', 'gravel', 'stepping_stones', 'grass_paver',
       'paver'], dtype=object)
In [17]:
create_barchart(sidewalks_gdf,'Year of Survey','Year of Survey Image (sidewalks)')
Out[17]:
In [18]:
# updating info:
sidewalks_updating = pd.read_json('../data/sidewalks_versioning.json')
crossings_updating = pd.read_json('../data/crossings_versioning.json')
kerbs_updating = pd.read_json('../data/kerbs_versioning.json')

updating_dict = {'sidewalks':sidewalks_updating,'crossings':crossings_updating,'kerbs':kerbs_updating}
In [19]:
# crating month_year field:

for category in updating_dict:

    updating_dict[category]['month_year'] = updating_dict[category]['rev_month'].map("{:02d}".format) + '_' + updating_dict[category]['rev_year'].astype(str)

    updating_dict[category]['year_month'] =  updating_dict[category]['rev_year'].astype(str) + "_" + updating_dict[category]['rev_month'].map("{:02d}".format)


    updating_dict[category].sort_values('year_month',inplace=True)
In [20]:
print_relevant_columnames(sidewalks_updating,not_include=[])
osmid,  rev_day,  rev_month,  rev_year,  n_revs,  month_year,  year_month, 
In [21]:
create_barchart(sidewalks_updating,'year_month','Year and Month Of Update (Sidewalks)',x_sort='-x')
Out[21]:
In [22]:
create_barchart(sidewalks_updating,'n_revs','Number Of Revisions (Sidewalks)',x_sort='-x')
Out[22]:

CROSSINGS STATISTICS¶

In [23]:
crossings_gdf = gpd.read_file('../data/crossings.geojson')
# crossings_data = pd.DataFrame(crossings_gdf)
In [24]:
# compute lengths only once:
crossings_gdf['length(km)'] = crossings_gdf.to_crs(utm_crs).length/1000

crossings_gdf['weblink'] = crossings_gdf['id'].astype('string').apply(return_weblink)

crossings_gdf['Year of Survey'] = crossings_gdf['survey:date'].apply(get_year_surveydate)
In [25]:
print_relevant_columnames(crossings_gdf)
highway,  crossing,  lit,  wheelchair,  bicycle,  mapillary,  survey:date,  traffic_calming,  kerb,  footway,  crossing:island,  tactile_paving,  name,  horse,  access,  smoothness,  surface,  foot,  segregated,  layer,  oneway,  note,  motor_vehicle,  incline,  lcn,  alt_name,  lanes,  level,  cycleway,  cycleway:right,  crossing:markings,  construction,  last_update,  update_date,  length(km),  weblink,  Year of Survey, 
In [26]:
create_barchart(crossings_gdf,'crossing','Crossing Type')
Out[26]:
In [27]:
create_barchart(crossings_gdf,'surface','Crossing Surface')
Out[27]:
In [28]:
double_scatter_bar(crossings_gdf,'Surface x Smoothness (crossings)',
hcolor='crossing',
# scolor='crossing',
)
Out[28]:
In [29]:
create_barchart(crossings_gdf,'Year of Survey','Year of Survey Image (crossings)')
Out[29]:
In [30]:
create_barchart(crossings_updating,'year_month','Year and Month Of Update (Crossings)',x_sort='-x')
Out[30]:
In [31]:
create_barchart(crossings_updating,'n_revs','Number Of Revisions (Crossings)',x_sort='-x')
Out[31]:

KERBS STATISTICS¶

In [32]:
kerbs_gdf = gpd.read_file('../data/kerbs.geojson')
# kerbs_data = pd.DataFrame(kerbs_gdf)
In [33]:
kerbs_gdf['Year of Survey'] = kerbs_gdf['survey:date'].apply(get_year_surveydate)
In [34]:
print_relevant_columnames(kerbs_gdf)
crossing,  crossing_ref,  highway,  kerb,  tactile_paving,  traffic_calming,  traffic_signals,  bicycle,  mapillary,  survey:date,  wheelchair,  button_operated,  traffic_signals:sound,  traffic_signals:vibration,  crossing:island,  image,  barrier,  surface,  kerb:height,  smoothness,  description,  footway,  name,  landuse,  oneway,  lit,  last_update,  update_date,  Year of Survey, 
In [35]:
create_barchart(kerbs_gdf,'kerb','Kerb Type')
Out[35]:
In [36]:
create_barchart(kerbs_gdf,'tactile_paving','Kerb Tactile Paving Presence')
Out[36]:
In [37]:
create_barchart(kerbs_gdf,'wheelchair','Kerb Wheelchair Acessibility')
Out[37]:
In [38]:
create_barchart(kerbs_gdf,'kerb:height','Kerb Height')
Out[38]:
In [39]:
create_barchart(kerbs_gdf,'Year of Survey','Year of Survey Image (kerbs)')
Out[39]:
In [40]:
double_scatter_bar(kerbs_gdf,'Kerb x Tactile Paving x Wheelchair Acess.',xs='kerb',ys='tactile_paving',yh1='kerb',yh2='tactile_paving',xh='count()',hcolor='wheelchair')
Out[40]:
In [41]:
create_barchart(kerbs_updating,'year_month','Year and Month Of Update (Kerbs)',x_sort='-x')
Out[41]:
In [42]:
create_barchart(kerbs_updating,'n_revs','Number Of Revisions (Kerbs)',x_sort='-x')
Out[42]:
In [43]:
import sys
sys.path.append('..')
In [46]:
from time import sleep
from oswm_codebase.functions import record_datetime,gen_updating_infotable_page

# to record data aging:
record_datetime('Statistical Charts','../data/last_updated.json')
sleep(.1)

# generate the "report" of the updating info
# gen_updating_infotable_page('../data/data_updating.html','../data/last_updated.json')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_20423/2240378869.py in <module>
      1 from time import sleep
----> 2 from oswm_codebase.functions import record_datetime,gen_updating_infotable_page
      3 
      4 # to record data aging:
      5 record_datetime('Statistical Charts','../data/last_updated.json')

~/opensidewalkmap_beta/statistics/../oswm_codebase/functions.py in <module>
     78 <style>
     79 
---> 80     {file_as_string('oswm_codebase/assets/styles/font_styles.css')}
     81 
     82 </style>

~/opensidewalkmap_beta/statistics/../oswm_codebase/functions.py in file_as_string(inputpath)
     25 
     26 def file_as_string(inputpath:str):
---> 27     with open(inputpath) as reader:
     28         return reader.read()
     29 

FileNotFoundError: [Errno 2] No such file or directory: 'oswm_codebase/assets/styles/font_styles.css'
In [ ]:
!jupyter nbconvert --to html statistics.ipynb
[NbConvertApp] Converting notebook statistics.ipynb to html
[NbConvertApp] Writing 5558277 bytes to statistics.html